# Load libraries
library(data.table)
library(devtools)
library(presto)
library(BiocParallel)
library(glmGamPoi)
library(sctransform)
library(Seurat)
library(tidyverse)
library(miQC)
library(SeuratWrappers)
library(flexmix)
library(SingleCellExperiment)
library(SummarizedExperiment)
library(readxl)
library(fishpond)
library(Matrix)
library(speckle)
library(scater)
library(patchwork)
library(vctrs)
library(alevinQC)
library(harmony)
library(scDblFinder)
library(cellXY)
# Set global options for Seurat v5 objects
options(Seurat.object.assay.version = 'v5')2 Tumor: Normalization, doublet discrimination, integration, clustering
2.1 Set up Seurat workspace
2.2 Load previously saved merged object
merged.18279.tumor <- readRDS("Tumor_scRNA_Part1.rds")2.3 Normalize and scale data
Regress out mitochondrial contribution
merged.18279.tumor <- NormalizeData(merged.18279.tumor)
merged.18279.tumor <- FindVariableFeatures(merged.18279.tumor,
assay = "RNA",
layer = "data",
selection.method = "vst",
nfeatures = 5000)
merged.18279.tumor <- ScaleData(merged.18279.tumor, vars.to.regress = "percent.mt")2.4 Run PCA
merged.18279.tumor <- RunPCA(merged.18279.tumor, npcs = 200, verbose = FALSE)
ElbowPlot(merged.18279.tumor, ndims = 200, reduction = "pca")2.5 Plot unintegrated UMAP
merged.18279.tumor <- RunUMAP(merged.18279.tumor,
reduction = "pca",
dims = 1:50,
reduction.name = "umap.unintegrated")Warning: The default method for RunUMAP has changed from calling Python UMAP via reticulate to the R-native UWOT using the cosine metric
To use Python UMAP via reticulate, set umap.method to 'umap-learn' and metric to 'correlation'
This message will be shown once per session
11:47:33 UMAP embedding parameters a = 0.9922 b = 1.112
11:47:33 Read 31415 rows and found 50 numeric columns
11:47:33 Using Annoy for neighbor search, n_neighbors = 30
11:47:33 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
11:47:37 Writing NN index file to temp file /tmp/RtmpkA48Fy/file287cc87b7f04fc
11:47:37 Searching Annoy index using 1 thread, search_k = 3000
11:47:48 Annoy recall = 100%
11:47:49 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
11:47:51 Initializing from normalized Laplacian + noise (using RSpectra)
11:47:53 Commencing optimization for 200 epochs, with 1386960 positive edges
11:48:07 Optimization finished
DimPlot(merged.18279.tumor, reduction = "umap.unintegrated", group.by = c("Site", "Patient", "Timepoint"))2.6 Call preliminary clusters for the purposes of doublet discrimination
merged.18279.tumor <- FindNeighbors(merged.18279.tumor, dims = 1:50, reduction = "pca")Computing nearest neighbor graph
Computing SNN
merged.18279.tumor <- FindClusters(merged.18279.tumor,
resolution = 0.4,
algorithm = 2)Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 31415
Number of edges: 1246446
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9585
Number of communities: 29
Elapsed time: 4 seconds
DimPlot(merged.18279.tumor, reduction = "umap.unintegrated", label = T)2.7 Identify and remove doublets
This uses raw counts as input
2.7.1 Combine RNA layers
merged.18279.tumor[['RNA']] <- JoinLayers(merged.18279.tumor[['RNA']])2.7.2 Convert seurat to sce and check colData
merged.18279.tumor.sce <- as.SingleCellExperiment(merged.18279.tumor, assay = "RNA")
merged.18279.tumor.sceclass: SingleCellExperiment
dim: 61217 31415
metadata(0):
assays(2): counts logcounts
rownames(61217): 5-8S-rRNA 5S-rRNA ... ZZEF1 ZZZ3
rowData names(0):
colnames(31415): P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA ...
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT
colData names(15): orig.ident nCount_RNA ... seurat_clusters ident
reducedDimNames(2): PCA UMAP.UNINTEGRATED
mainExpName: RNA
altExpNames(0):
colData(merged.18279.tumor.sce)DataFrame with 31415 rows and 15 columns
orig.ident nCount_RNA
<character> <numeric>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT SeuratProject 5288.99
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA SeuratProject 3314.99
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA SeuratProject 4434.99
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG SeuratProject 5171.98
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC SeuratProject 4295.91
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG SeuratProject 526.988
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC SeuratProject 532.000
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC SeuratProject 556.000
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC SeuratProject 541.000
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT SeuratProject 500.000
nFeature_RNA percent.mt
<integer> <numeric>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT 1842 2.76045
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA 1716 4.83561
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA 1806 4.64488
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG 2118 3.59762
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC 2067 10.98089
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG 252 10.05716
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC 310 7.33083
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC 269 9.89209
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC 467 8.59519
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT 398 5.10000
miQC.probability miQC.keep
<numeric> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT 0.166588 keep
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA 0.125028 keep
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA 0.131072 keep
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG 0.168588 keep
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC 0.493406 keep
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG 0.3196529 keep
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC 0.1016414 keep
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC 0.2969933 keep
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC 0.1737173 keep
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT 0.0620316 keep
Patient Site
<character> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT P101 Tumor
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA P101 Tumor
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA P101 Tumor
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG P101 Tumor
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC P101 Tumor
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG P108 Tumor
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC P108 Tumor
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC P108 Tumor
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC P108 Tumor
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT P108 Tumor
Timepoint IpiCohort
<character> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT W00 2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA W00 2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA W00 2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG W00 2.5mgIpi
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC W00 2.5mgIpi
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG PD 5mgIpi
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC PD 5mgIpi
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC PD 5mgIpi
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC PD 5mgIpi
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT PD 5mgIpi
Assay Sample
<character> <character>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG RNA P101_Tumor_W00_2.5mg..
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC RNA P101_Tumor_W00_2.5mg..
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC RNA P108_Tumor_PD_5mgIpi..
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT RNA P108_Tumor_PD_5mgIpi..
RNA_snn_res.0.4 seurat_clusters
<factor> <factor>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT 6 6
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA 22 22
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA 11 11
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG 6 6
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC 8 8
... ... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG 12 12
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC 12 12
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC 12 12
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC 12 12
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT 6 6
ident
<factor>
P101_Tumor_W00_2.5mgIpi_RNA_AGGTCCGGTGACAAAT 6
P101_Tumor_W00_2.5mgIpi_RNA_GATCGCGAGTACGCGA 22
P101_Tumor_W00_2.5mgIpi_RNA_TCACGAAAGAGCCCAA 11
P101_Tumor_W00_2.5mgIpi_RNA_CAACCTCCAGCCTTGG 6
P101_Tumor_W00_2.5mgIpi_RNA_GCTCTGTCACAGTCGC 8
... ...
P108_Tumor_PD_5mgIpi_RNA_ACTTACTAGGAGCGAG 12
P108_Tumor_PD_5mgIpi_RNA_CACCAGGTCGTACGGC 12
P108_Tumor_PD_5mgIpi_RNA_CGACTTCAGTGGAGTC 12
P108_Tumor_PD_5mgIpi_RNA_ATTGGTGTCGTTTTCC 12
P108_Tumor_PD_5mgIpi_RNA_AGCGTCGTCATACGGT 6
2.7.3 Run scDblFinder
Set the dbr.sd very high to better allow thresholds to be set based on misclassification rates per sample
merged.18279.tumor.sce <- scDblFinder(merged.18279.tumor.sce,
samples = "Sample",
dbr.sd = 1,
clusters = "seurat_clusters",
BPPARAM = MulticoreParam(4,RNGseed=123)
)2.7.4 Inspect results
# Look at the classes
table(merged.18279.tumor.sce$seurat_clusters, merged.18279.tumor.sce$scDblFinder.class)
singlet doublet
0 4218 323
1 2529 212
2 2419 113
3 2398 101
4 2218 181
5 2234 6
6 1873 287
7 2036 10
8 1284 155
9 1232 157
10 1095 60
11 938 126
12 907 2
13 563 159
14 454 44
15 393 70
16 383 60
17 355 42
18 278 26
19 262 26
20 200 68
21 200 19
22 51 125
23 0 138
24 81 22
25 81 12
26 33 51
27 16 54
28 5 30
table(merged.18279.tumor.sce$Sample, merged.18279.tumor.sce$scDblFinder.class)
singlet doublet
P101_Tumor_W00_2.5mgIpi_RNA 1028 100
P101_Tumor_W12_2.5mgIpi_RNA 4807 316
P101_Tumor_W20_2.5mgIpi_RNA 1298 99
P103_Tumor_W00_2.5mgIpi_RNA 4635 426
P103_Tumor_W12_2.5mgIpi_RNA 5058 465
P103_Tumor_W20_2.5mgIpi_RNA 4085 468
P104_Tumor_PD_2.5mgIpi_RNA 3572 360
P108_Tumor_PD_5mgIpi_RNA 4253 445
# Look at the scores
summary(merged.18279.tumor.sce$scDblFinder.score) Min. 1st Qu. Median Mean 3rd Qu. Max.
0.0000039 0.0054557 0.0275547 0.1358086 0.1161005 0.9999959
2.7.5 Save doublet classifications into main Seurat object
merged.18279.tumor$doublet_classification <- merged.18279.tumor.sce$scDblFinder.class2.7.6 Count singlets and doublets
table(merged.18279.tumor$doublet_classification)
singlet doublet
28736 2679
table(merged.18279.tumor$doublet_classification, merged.18279.tumor$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 10 11 12 13
singlet 4218 2529 2419 2398 2218 2234 1873 2036 1284 1232 1095 938 907 563
doublet 323 212 113 101 181 6 287 10 155 157 60 126 2 159
14 15 16 17 18 19 20 21 22 23 24 25 26 27
singlet 454 393 383 355 278 262 200 200 51 0 81 81 33 16
doublet 44 70 60 42 26 26 68 19 125 138 22 12 51 54
28
singlet 5
doublet 30
2.7.7 Plot singlets/doublets in UMAP space
DimPlot(merged.18279.tumor,reduction = "umap.unintegrated", group.by = "doublet_classification")2.7.8 Subset object to remove doublets and count remaining cells
merged.18279.tumor.singlets <- subset(merged.18279.tumor, doublet_classification %in% c("singlet"))
dim(merged.18279.tumor.singlets)[1] 61217 28736
# Count remaining cells per initial cluster
table(merged.18279.tumor.singlets$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
4218 2529 2419 2398 2218 2234 1873 2036 1284 1232 1095 938 907 563 454 393
16 17 18 19 20 21 22 23 24 25 26 27 28
383 355 278 262 200 200 51 0 81 81 33 16 5
2.8 Remove cells with very high nCount_RNA values, set other final QC filters
merged.18279.tumor.singlets <- subset(merged.18279.tumor.singlets,
subset = nCount_RNA < 40000 & nCount_RNA > 1500 & nFeature_RNA > 750)
dim(merged.18279.tumor.singlets)[1] 61217 19976
2.9 Re-compute PCA
Re-scale data now that it has been subset
merged.18279.tumor.singlets[["RNA"]] <- split(merged.18279.tumor.singlets[["RNA"]], f = merged.18279.tumor.singlets$Sample)
merged.18279.tumor.singlets <- FindVariableFeatures(merged.18279.tumor.singlets,
assay = "RNA",
layer = "data",
selection.method = "vst",
nfeatures = 5000)
merged.18279.tumor.singlets <- ScaleData(merged.18279.tumor.singlets, vars.to.regress = "percent.mt")
merged.18279.tumor.singlets <- RunPCA(merged.18279.tumor.singlets, npcs = 200, verbose = FALSE)2.10 Run Harmony integration
merged.18279.tumor.singlets <- IntegrateLayers(merged.18279.tumor.singlets,
method = HarmonyIntegration,
orig.reduction = "pca",
new.reduction = "integrated.harmony"
)Warning: HarmonyMatrix is deprecated and will be removed in the future from the
API in the future
Warning: Warning: The parameters do_pca and npcs are deprecated. They will be ignored for this function call and please remove parameters do_pca and npcs and pass to harmony cell_embeddings directly.
This warning is displayed once per session.
Warning: Warning: The parameter tau is deprecated. It will be ignored for this function call and please remove parameter tau in future function calls. Advanced users can set value of parameter tau by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter block.size is deprecated. It will be ignored for this function call and please remove parameter block.size in future function calls. Advanced users can set value of parameter block.size by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.harmony is replaced with parameter max_iter. It will be ignored for this function call and please use parameter max_iter in future function calls.
This warning is displayed once per session.
Warning: Warning: The parameter max.iter.cluster is deprecated. It will be ignored for this function call and please remove parameter max.iter.cluster in future function calls. Advanced users can set value of parameter max.iter.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.cluster is deprecated. It will be ignored for this function call and please remove parameter epsilon.cluster in future function calls. Advanced users can set value of parameter epsilon.cluster by using parameter .options and function harmony_options().
This warning is displayed once per session.
Warning: Warning: The parameter epsilon.harmony is deprecated. It will be ignored for this function call and please remove parameter epsilon.harmony in future function calls. If users want to control if harmony would stop early or not, use parameter early_stop. Advanced users can set value of parameter epsilon.harmony by using parameter .options and function harmony_options().
This warning is displayed once per session.
Transposing data matrix
Using automatic lambda estimation
Initializing state using k-means centroids initialization
Harmony 1/10
Harmony 2/10
Harmony 3/10
Harmony 4/10
Harmony 5/10
Harmony converged after 5 iterations
2.11 Identify clusters after integration across a range of clustering resolutions
merged.18279.tumor.singlets <- FindNeighbors(merged.18279.tumor.singlets, dims = 1:50, reduction = "integrated.harmony")Computing nearest neighbor graph
Computing SNN
merged.18279.tumor.singlets <- FindClusters(merged.18279.tumor.singlets,
resolution = seq(0.1, 2, by=0.1),
algorithm = 2)Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9763
Number of communities: 12
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9563
Number of communities: 16
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9434
Number of communities: 18
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9327
Number of communities: 20
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9228
Number of communities: 25
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9136
Number of communities: 26
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.9045
Number of communities: 28
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8970
Number of communities: 29
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8896
Number of communities: 29
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8826
Number of communities: 30
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8761
Number of communities: 32
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8702
Number of communities: 33
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8650
Number of communities: 34
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8597
Number of communities: 34
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8548
Number of communities: 35
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8499
Number of communities: 36
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8451
Number of communities: 37
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8407
Number of communities: 41
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8369
Number of communities: 43
Elapsed time: 2 seconds
Modularity Optimizer version 1.3.0 by Ludo Waltman and Nees Jan van Eck
Number of nodes: 19976
Number of edges: 792019
Running Louvain algorithm with multilevel refinement...
Maximum modularity in 10 random starts: 0.8329
Number of communities: 42
Elapsed time: 2 seconds
merged.18279.tumor.singlets <- RunUMAP(merged.18279.tumor.singlets,
reduction = "integrated.harmony",
dims = 1:50,
reduction.name = "umap.harmony")12:04:19 UMAP embedding parameters a = 0.9922 b = 1.112
12:04:19 Read 19976 rows and found 50 numeric columns
12:04:19 Using Annoy for neighbor search, n_neighbors = 30
12:04:19 Building Annoy index with metric = cosine, n_trees = 50
0% 10 20 30 40 50 60 70 80 90 100%
[----|----|----|----|----|----|----|----|----|----|
**************************************************|
12:04:21 Writing NN index file to temp file /tmp/RtmpkA48Fy/file287cc85a21cae4
12:04:21 Searching Annoy index using 1 thread, search_k = 3000
12:04:28 Annoy recall = 100%
12:04:29 Commencing smooth kNN distance calibration using 1 thread with target n_neighbors = 30
12:04:31 Initializing from normalized Laplacian + noise (using RSpectra)
12:04:33 Commencing optimization for 200 epochs, with 883792 positive edges
12:04:43 Optimization finished
table(merged.18279.tumor.singlets$seurat_clusters)
0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15
1482 1321 1147 1135 1132 1106 968 945 849 848 783 709 670 538 537 485
16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31
467 444 428 406 399 371 336 306 288 274 229 228 151 147 121 114
32 33 34 35 36 37 38 39 40 41
104 85 82 77 75 63 36 33 30 27
2.12 Plot clusters
# Plot one as example
DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", label = TRUE, group.by = "RNA_snn_res.1")2.13 Plot metadata in UMAP space
DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Patient")DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Site")DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Timepoint")DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "IpiCohort")DimPlot(merged.18279.tumor.singlets,reduction = "umap.harmony", group.by = "Sample") + NoLegend()FeaturePlot(merged.18279.tumor.singlets,reduction = "umap.harmony",features="nCount_RNA",order=T)FeaturePlot(merged.18279.tumor.singlets,reduction = "umap.harmony",features="nFeature_RNA",order=T)FeaturePlot(merged.18279.tumor.singlets,reduction = "umap.harmony",features="percent.mt",order=T)2.14 Save clustered object
saveRDS(merged.18279.tumor.singlets,"Tumor_scRNA_Part2.rds")2.15 Get session info
sessionInfo()R version 4.3.2 (2023-10-31)
Platform: x86_64-pc-linux-gnu (64-bit)
Running under: Rocky Linux 8.10 (Green Obsidian)
Matrix products: default
BLAS/LAPACK: /usr/lib64/libopenblasp-r0.3.15.so; LAPACK version 3.9.0
locale:
[1] LC_CTYPE=en_US.UTF-8 LC_NUMERIC=C
[3] LC_TIME=en_US.UTF-8 LC_COLLATE=en_US.UTF-8
[5] LC_MONETARY=en_US.UTF-8 LC_MESSAGES=en_US.UTF-8
[7] LC_PAPER=en_US.UTF-8 LC_NAME=C
[9] LC_ADDRESS=C LC_TELEPHONE=C
[11] LC_MEASUREMENT=en_US.UTF-8 LC_IDENTIFICATION=C
time zone: America/New_York
tzcode source: system (glibc)
attached base packages:
[1] stats4 stats graphics grDevices utils datasets methods
[8] base
other attached packages:
[1] cellXY_0.99.0 scDblFinder_1.14.0
[3] harmony_1.2.0 alevinQC_1.16.1
[5] vctrs_0.6.5 patchwork_1.3.0
[7] scater_1.30.1 scuttle_1.12.0
[9] speckle_1.0.0 Matrix_1.6-4
[11] fishpond_2.6.2 readxl_1.4.3
[13] SingleCellExperiment_1.24.0 SummarizedExperiment_1.32.0
[15] Biobase_2.62.0 GenomicRanges_1.54.1
[17] GenomeInfoDb_1.38.8 IRanges_2.36.0
[19] S4Vectors_0.40.2 BiocGenerics_0.48.1
[21] MatrixGenerics_1.14.0 matrixStats_1.4.1
[23] flexmix_2.3-19 lattice_0.22-6
[25] SeuratWrappers_0.3.19 miQC_1.8.0
[27] lubridate_1.9.3 forcats_1.0.0
[29] stringr_1.5.1 dplyr_1.1.4
[31] purrr_1.0.2 readr_2.1.5
[33] tidyr_1.3.1 tibble_3.2.1
[35] ggplot2_3.5.1 tidyverse_2.0.0
[37] Seurat_5.1.0 SeuratObject_5.0.2
[39] sp_2.1-4 sctransform_0.4.1
[41] glmGamPoi_1.12.2 BiocParallel_1.36.0
[43] presto_1.0.0 Rcpp_1.0.13-1
[45] devtools_2.4.5 usethis_3.0.0
[47] data.table_1.16.2
loaded via a namespace (and not attached):
[1] fs_1.6.5 spatstat.sparse_3.1-0
[3] bitops_1.0-9 httr_1.4.7
[5] RColorBrewer_1.1-3 profvis_0.4.0
[7] tools_4.3.2 utf8_1.2.4
[9] R6_2.5.1 DT_0.33
[11] lazyeval_0.2.2 uwot_0.2.2
[13] urlchecker_1.0.1 withr_3.0.2
[15] GGally_2.2.1 gridExtra_2.3
[17] progressr_0.15.1 cli_3.6.3
[19] spatstat.explore_3.2-6 fastDummies_1.7.3
[21] labeling_0.4.3 spatstat.data_3.1-4
[23] ggridges_0.5.6 pbapply_1.7-2
[25] Rsamtools_2.18.0 R.utils_2.12.3
[27] parallelly_1.39.0 sessioninfo_1.2.2
[29] limma_3.58.1 RSQLite_2.3.8
[31] BiocIO_1.12.0 generics_0.1.3
[33] gtools_3.9.5 ica_1.0-3
[35] spatstat.random_3.2-2 ggbeeswarm_0.7.2
[37] fansi_1.0.6 abind_1.4-8
[39] R.methodsS3_1.8.2 lifecycle_1.0.4
[41] yaml_2.3.10 edgeR_4.0.16
[43] SparseArray_1.2.2 Rtsne_0.17
[45] blob_1.2.4 grid_4.3.2
[47] dqrng_0.4.1 promises_1.3.0
[49] crayon_1.5.3 shinydashboard_0.7.2
[51] miniUI_0.1.1.1 beachmat_2.18.1
[53] cowplot_1.1.3 KEGGREST_1.42.0
[55] metapod_1.10.1 pillar_1.9.0
[57] knitr_1.45 rjson_0.2.23
[59] xgboost_1.7.8.1 future.apply_1.11.3
[61] codetools_0.2-20 leiden_0.4.3.1
[63] glue_1.8.0 remotes_2.5.0
[65] png_0.1-8 spam_2.11-0
[67] org.Mm.eg.db_3.18.0 cellranger_1.1.0
[69] gtable_0.3.6 cachem_1.1.0
[71] xfun_0.49 S4Arrays_1.2.0
[73] mime_0.12 survival_3.7-0
[75] bluster_1.12.0 statmod_1.5.0
[77] ellipsis_0.3.2 fitdistrplus_1.2-1
[79] ROCR_1.0-11 nlme_3.1-166
[81] bit64_4.5.2 RcppAnnoy_0.0.22
[83] irlba_2.3.5.1 vipor_0.4.7
[85] KernSmooth_2.23-24 DBI_1.2.3
[87] colorspace_2.1-1 nnet_7.3-19
[89] tidyselect_1.2.1 bit_4.5.0
[91] compiler_4.3.2 BiocNeighbors_1.20.2
[93] DelayedArray_0.28.0 plotly_4.10.4
[95] rtracklayer_1.62.0 scales_1.3.0
[97] lmtest_0.9-40 digest_0.6.37
[99] goftest_1.2-3 spatstat.utils_3.1-1
[101] rmarkdown_2.29 RhpcBLASctl_0.23-42
[103] XVector_0.42.0 htmltools_0.5.8.1
[105] pkgconfig_2.0.3 sparseMatrixStats_1.14.0
[107] fastmap_1.2.0 rlang_1.1.4
[109] htmlwidgets_1.6.4 shiny_1.9.1
[111] DelayedMatrixStats_1.24.0 farver_2.1.2
[113] zoo_1.8-12 jsonlite_1.8.9
[115] R.oo_1.27.0 BiocSingular_1.18.0
[117] RCurl_1.98-1.16 magrittr_2.0.3
[119] modeltools_0.2-23 GenomeInfoDbData_1.2.11
[121] dotCall64_1.2 munsell_0.5.1
[123] viridis_0.6.5 reticulate_1.35.0
[125] stringi_1.8.4 zlibbioc_1.48.2
[127] MASS_7.3-60.0.1 org.Hs.eg.db_3.18.0
[129] plyr_1.8.9 pkgbuild_1.4.5
[131] ggstats_0.7.0 parallel_4.3.2
[133] listenv_0.9.1 ggrepel_0.9.6
[135] deldir_2.0-4 Biostrings_2.70.3
[137] splines_4.3.2 tensor_1.5
[139] hms_1.1.3 locfit_1.5-9.10
[141] igraph_2.1.1 spatstat.geom_3.2-8
[143] RcppHNSW_0.6.0 reshape2_1.4.4
[145] ScaledMatrix_1.10.0 pkgload_1.4.0
[147] XML_3.99-0.17 evaluate_1.0.1
[149] scran_1.30.2 BiocManager_1.30.25
[151] tzdb_0.4.0 httpuv_1.6.15
[153] RANN_2.6.2 polyclip_1.10-7
[155] future_1.34.0 scattermore_1.2
[157] rsvd_1.0.5 xtable_1.8-4
[159] restfulr_0.0.15 svMisc_1.2.3
[161] RSpectra_0.16-2 later_1.3.2
[163] viridisLite_0.4.2 AnnotationDbi_1.64.1
[165] GenomicAlignments_1.38.2 memoise_2.0.1
[167] beeswarm_0.4.0 tximport_1.28.0
[169] cluster_2.1.6 timechange_0.3.0
[171] globals_0.16.3